https://stats.stackexchange.com/questions/96972/how-to-interpret-parameters-in-glm-with-family-gamma http://rstudio-pubs-static.s3.amazonaws.com/5691_192685385fc445c9b3fb1619960a20e2.html

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0       ✔ purrr   0.2.5  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.2       ✔ stringr 1.3.1  
## ✔ readr   1.3.1       ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(glue) # for pretty printing
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(tictoc) # for cell/function profiling

GoFundMe Analysis of Mean Donation

projects = read.csv("../../data/processed/gofundme_projects_clean.csv")
projects$month = factor(projects$month)
projects$day_of_week = factor(projects$day_of_week)

In total, we have N=5223 IID samples to work with.

Removed all random variabes except year because they didn’t help explain any variance in the data beyond what the residuals could capture. Year is a reasonable random effect as well (see Variance Components, Searle et al 2006)

Nesting and Chi2 differences: https://www.psychologie.uzh.ch/dam/jcr:ffffffff-b371-2797-0000-00000fda8f29/chisquare_diff_en.pdf

Mean Donation

upper_limit = quantile(projects$mean_donation, 0.995) # remove the top 0.5% of projects, eventually create models with these data included
projects.trunc = projects[projects$mean_donation < upper_limit, ]

projects.trunc %>%
  ggplot() + labs(title="Mean Donation Density") +
  geom_density(aes(mean_donation))

projects.trunc %>%
  ggplot() + geom_qq(aes(sample=mean_donation), distribution = qexp) + geom_qq_line(aes(sample=mean_donation), distribution = qexp)

projects.trunc %>%
  ggplot(aes(goal, mean_donation)) + labs(title="Goal Amount Distribution") +
  geom_point(aes(alpha=0.1)) +
  theme_minimal()

projects.trunc %>%
  ggplot(aes(duration_float, mean_donation)) + labs(title="Duration Distribution") +
  geom_point(aes(alpha=0.1)) +
  theme_minimal()

projects.trunc %>%
  ggplot(aes(text_length_words, mean_donation)) + labs(title="Text Length Distribution") +
  geom_point(aes(alpha=0.1)) +
  theme_minimal()

projects.trunc %>%
  ggplot(aes(photos, mean_donation)) + labs(title="Photos Distribution") +
  geom_point(aes(alpha=0.1)) +
  theme_minimal()

projects.trunc %>%
  ggplot(aes(updates, mean_donation)) + labs(title="Updates Distribution") +
  geom_point(aes(alpha=0.1)) + 
  theme_minimal()

projects.trunc %>%
  ggplot(aes(friends, mean_donation)) + labs(title="FB Friends Distribution") +
  geom_point(aes(alpha=0.1)) + 
  theme_minimal()

projects.trunc %>%
  ggplot(aes(shares, mean_donation)) + labs(title="FB Shares Distribution") +
  geom_point(aes(alpha=0.1)) + 
  theme_minimal()

projects.trunc %>%
  ggplot(aes(cancer_type, mean_donation)) + labs(title="Cancer Types") +
  geom_boxplot() +
  theme_minimal() +
  theme(axis.text.x=element_text(angle = 60, hjust=1))

The data look like a Gamma, so we model with Gamma and a log link (see above answer for reason). It might be nice

Ideally, I could run a distributional test and confirm (or reject) this intuition.

Establish the base formula to build models off of:

base.formula = mean_donation ~ shares_sc + friends_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + duration_float_sc + cancer_type + month + day_of_week + (1|year)

Create a helper function to easily compare two models and pretty print the output:

compare = function(model.old, model.new, key, multiplier=1.0) {
  a = anova(model.old, model.new)
  p = a$`Pr(>Chisq)`[2]
  print(glue("Variable: {key}"))
  print(glue("DF diff: {a$`Chi Df`[2]}\t New AIC: {round(a$AIC[2], 1)}\t\t Chisq: {round(a$Chisq[2],1)}\t P(>chisq) = {round(p,4)} {if (p < 0.05) '(significant)' else ''})"))
  effect = as.numeric(summary(mod)$coefficients[key, ])
  print(glue("beta: {round(effect[1],4)} (+/- {round(effect[2],3)})\t stat: {round(effect[3], 2)} => P(>|z|) = {round(effect[4], 4)}"))
  print(glue("rate ratio: {round((exp(effect[1]*multiplier) - 1)*100, 1)}% ({round((exp((effect[1] - effect[2])*multiplier) - 1)*100, 1)}% to {round((exp((effect[1] + effect[2])*multiplier) - 1)*100, 1)}%)"))
  print(glue(""))
}

https://stats.stackexchange.com/questions/96972/how-to-interpret-parameters-in-glm-with-family-gamma

Key to interpretation is the log link used here

To put the results from the continuous predictors on interpretable scales, we use a multiplier within the exponent (see factor in compare()). For example, if the multiplier is 1, this leads to: for a one unit increase in this predictor, we see an x-fold increase in the DV.

p = function(var, scl=1, digits=6, mult=1.00, mask=T) {
  mu = mean(projects[mask, var])
  sd = sd(projects[mask, var])
  print(glue("Var: {var}\t Mean: {round(mu,digits)*scl}\t SD: {round(sd,digits)*scl}\t Scale (x{round(mult, 2)}): {paste(round(mult*sd + mu, digits)*scl)}"))
}
p("journey_salience", scl = 1000, digits = 5, mult = 0.6)
## Var: journey_salience     Mean: 0.52  SD: 1.68    Scale (x0.6): 1.53
p("battle_salience", scl = 1000, digits = 5, mult = 0.16)
## Var: battle_salience  Mean: 3.87  SD: 6.27    Scale (x0.16): 4.87
p("journey_prod", digits = 4, mask = projects$journey_metaphor > 0)
## Var: journey_prod     Mean: 1.5567    SD: 1.0602  Scale (x1): 2.6169
p("battle_prod", digits = 4, mask = projects$battle_metaphor > 0)
## Var: battle_prod  Mean: 3.4581    SD: 3.0339  Scale (x1): 6.492
p("first_instantiation", digits = 4, mask = projects$journey_metaphor > 0, mult = 1.6)
## Var: first_instantiation  Mean: 0.3943    SD: 0.3191  Scale (x1.6): 0.9049
p("first_instantiation", digits = 4, mask = projects$battle_metaphor > 0, mult = 1.6)
## Var: first_instantiation  Mean: 0.3952    SD: 0.3111  Scale (x1.6): 0.8929

Any Metaphor

Examine: does having any instance of metaphor influence mean donation?

Null: having any metaphor is not significantly different from having no metaphor

projects.trunc %>%
  ggplot() + theme_minimal() +
  geom_violin(aes(any_metaphor, mean_donation, fill=any_metaphor)) + 
  scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) + 
  geom_hline(yintercept = mean(projects.trunc[projects.trunc$any_metaphor == F, ]$mean_donation), lty=2, color="red") + 
  geom_hline(yintercept = mean(projects.trunc[projects.trunc$any_metaphor, ]$mean_donation), lty=2, color="blue")

mean.mod.base = glmer(base.formula, data = projects.trunc, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0321244
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula,  ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - photos_sc)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - month)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00125067
## (tol = 0.001, component 1)
# summary(mean.mod)

Results

# m2 = glm(status ~ shares_sc + friends_sc + updates_sc + goal_sc + text_length_words_sc + cancer_type + scale(journey_salience), data = projects.trunc.jor, family = "binomial")
# 
# m1 = glm(status ~ shares_sc + friends_sc + updates_sc + goal_sc + text_length_words_sc + cancer_type, data = projects.trunc.jor, family = "binomial")
# anova(m1,m2, test="Chisq")
# d = data.frame(name = names(coef(m2)), mu = as.numeric(coef(m2)))
# d$LB = ci[as.character(d$name), 1]
# d$UB = ci[as.character(d$name), 2]
# d$sig = 0 > d$UB | 0 < d$LB
# d = d[d$name != "(Intercept)", ]
# d
# ggplot(d) +
#   geom_bar(aes(x=name, y=mu, fill=sig), stat="identity") + 
#   geom_point(aes(x=name, y=LB), color="red") +
#   geom_point(aes(x=name, y=UB), color="blue") +
#   coord_flip()
mod = glmer(update(f,  ~ . + any_metaphor), data = projects.trunc, family = Gamma(link = "log"))
compare(mean.mod, mod, "any_metaphorTRUE")
## Variable: any_metaphorTRUE
## DF diff: 1    New AIC: 52318.9        Chisq: 39.9     P(>chisq) = 0 (significant))
## beta: 0.0802 (+/- 0.013)  stat: 6.34 => P(>|z|) = 0
## rate ratio: 8.4% (7% to 9.7%)
mod = glmer(update(base.formula,  ~ . + any_metaphor), data = projects.trunc, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0422402
## (tol = 0.001, component 1)
compare(mean.mod, mod, "any_metaphorTRUE")
## Variable: any_metaphorTRUE
## DF diff: 20   New AIC: 52337.2        Chisq: 59.7     P(>chisq) = 0 (significant))
## beta: 0.08 (+/- 0.013)    stat: 6.32 => P(>|z|) = 0
## rate ratio: 8.3% (7% to 9.7%)

Both Metaphors

Examine: does having both metaphor families present influence mean donation?

Null: having both metaphor families is not significantly different from having no metaphor

projects.trunc.both = projects.trunc[(1-projects.trunc$any_metaphor) | projects.trunc$both_metaphor, ] # only projects with both or none

N = 2790

projects.trunc.both %>%
  ggplot() + theme_minimal() +
  geom_violin(aes(both_metaphor, mean_donation, fill=both_metaphor)) + 
  scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) + 
  geom_hline(yintercept = mean(projects.trunc.both[projects.trunc.both$both_metaphor == F, ]$mean_donation), lty=2, color="red") + 
  geom_hline(yintercept = mean(projects.trunc.both[projects.trunc.both$both_metaphor, ]$mean_donation), lty=2, color="blue")

Perform model selection to prevent overfitting:

mean.mod.base = glmer(base.formula, data = projects.trunc.both, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0414938
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula,  ~ . - month)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - photos_sc)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.both, family = Gamma(link = "log"))
# summary(mean.mod)

Results

mod = glmer(update(f,  ~ . + both_metaphor), data = projects.trunc.both, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00146278
## (tol = 0.001, component 1)
compare(mean.mod, mod, "both_metaphorTRUE")
## Variable: both_metaphorTRUE
## DF diff: 1    New AIC: 27989.8        Chisq: 7.7  P(>chisq) = 0.0054 (significant))
## beta: 0.0709 (+/- 0.026)  stat: 2.77 => P(>|z|) = 0.0056
## rate ratio: 7.4% (4.6% to 10.1%)
mod = glmer(update(base.formula,  ~ . + both_metaphor), data = projects.trunc.both, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0380102
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "both_metaphorTRUE")
## Variable: both_metaphorTRUE
## DF diff: 1    New AIC: 28016.8        Chisq: 8.6  P(>chisq) = 0.0033 (significant))
## beta: 0.0747 (+/- 0.026)  stat: 2.91 => P(>|z|) = 0.0037
## rate ratio: 7.8% (5% to 10.6%)

Some Metaphor

Examine: does having only journey/battle, or dominant journey/battle, influence mean donation?

Null: having only journey/battle, or dominant journey/battle, is not significantly different from negative

If journey is not the dominant metaphor, that means battle is or there are equal amounts of each family If journey is not the only metaphor family, that means there is at least some battle metaphor

Dominant Journey: Null: There is no difference in outcomes when journey dominates compared to when battle dominates or there is no dominant family.

Dominant Battle: Null: There is no difference in outcomes when battle dominates compared to when journey dominates or there is no dominant family.

Only journey: Null: There is no difference in outcomes when journey is the only metaphor family present compared to when there is also battle metaphor.

projects.trunc.some = projects.trunc[projects.trunc$any_metaphor, ]

N = 2842

p1 = projects.trunc.some %>%
  ggplot() + theme_minimal() +
  geom_violin(aes(dom_journey, mean_donation, fill=dom_journey)) + 
  scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_journey == F, ]$mean_donation), lty=2, color="red") + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_journey, ]$mean_donation), lty=2, color="blue")

p2 = projects.trunc.some %>%
  ggplot() + theme_minimal() +
  geom_violin(aes(dom_battle, mean_donation, fill=dom_battle)) + 
  scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_battle == F, ]$mean_donation), lty=2, color="red") + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_battle, ]$mean_donation), lty=2, color="blue")

p3 = projects.trunc.some %>%
  ggplot() + theme_minimal() +
  geom_violin(aes(only_journey, mean_donation, fill=only_journey)) + 
  scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_journey == F, ]$mean_donation), lty=2, color="red") + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_journey, ]$mean_donation), lty=2, color="blue")

p4 = projects.trunc.some %>%
  ggplot() + theme_minimal() +
  geom_violin(aes(only_battle, mean_donation, fill=only_battle)) + 
  scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_battle == F, ]$mean_donation), lty=2, color="red") + 
  geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_battle, ]$mean_donation), lty=2, color="blue")

grid.arrange(p1, p2, p3, p4, nrow=1)

Perform model selection to prevent overfitting:

mean.mod.base = glmer(base.formula, data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.051572
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula,  ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.some, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.some, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0412533
## (tol = 0.001, component 1)
# summary(mean.mod)

Results

mod = glmer(update(f,  ~ . + dom_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0623054
## (tol = 0.001, component 1)
compare(mean.mod, mod, "dom_journeyTRUE")
## Variable: dom_journeyTRUE
## DF diff: 1    New AIC: 28671.9        Chisq: 1.4  P(>chisq) = 0.2288 )
## beta: 0.0301 (+/- 0.025)  stat: 1.2 => P(>|z|) = 0.2297
## rate ratio: 3.1% (0.5% to 5.7%)
mod = glmer(update(f,  ~ . + dom_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.064289
## (tol = 0.001, component 1)
compare(mean.mod, mod, "dom_battleTRUE")
## Variable: dom_battleTRUE
## DF diff: 1    New AIC: 28672.8        Chisq: 0.6  P(>chisq) = 0.4524 )
## beta: -0.0162 (+/- 0.021)     stat: -0.75 => P(>|z|) = 0.4513
## rate ratio: -1.6% (-3.7% to 0.5%)
mod = glmer(update(f,  ~ . + only_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0617672
## (tol = 0.001, component 1)
compare(mean.mod, mod, "only_journeyTRUE")
## Variable: only_journeyTRUE
## DF diff: 1    New AIC: 28672.7        Chisq: 0.7  P(>chisq) = 0.4037 )
## beta: 0.023 (+/- 0.027)   stat: 0.84 => P(>|z|) = 0.4002
## rate ratio: 2.3% (-0.4% to 5.2%)
mod = glmer(update(f,  ~ . + only_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0621089
## (tol = 0.001, component 1)
compare(mean.mod, mod, "only_battleTRUE")
## Variable: only_battleTRUE
## DF diff: 1    New AIC: 28672.8        Chisq: 0.6  P(>chisq) = 0.4495 )
## beta: -0.0143 (+/- 0.019)     stat: -0.76 => P(>|z|) = 0.4443
## rate ratio: -1.4% (-3.2% to 0.4%)
mod = glmer(update(base.formula,  ~ . + dom_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.062997
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "dom_journeyTRUE")
## Variable: dom_journeyTRUE
## DF diff: 1    New AIC: 28681.6        Chisq: 1.5  P(>chisq) = 0.223 )
## beta: 0.0307 (+/- 0.025)  stat: 1.22 => P(>|z|) = 0.2212
## rate ratio: 3.1% (0.6% to 5.7%)
mod = glmer(update(base.formula,  ~ . + dom_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.075027
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "dom_battleTRUE")
## Variable: dom_battleTRUE
## DF diff: 1    New AIC: 28682.5        Chisq: 0.5  P(>chisq) = 0.4672 )
## beta: -0.0155 (+/- 0.021)     stat: -0.72 => P(>|z|) = 0.4713
## rate ratio: -1.5% (-3.6% to 0.6%)
mod = glmer(update(base.formula,  ~ . + only_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0546125
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "only_journeyTRUE")
## Variable: only_journeyTRUE
## DF diff: 1    New AIC: 28682.4        Chisq: 0.7  P(>chisq) = 0.4044 )
## beta: 0.0232 (+/- 0.027)  stat: 0.85 => P(>|z|) = 0.3964
## rate ratio: 2.3% (-0.4% to 5.2%)
mod = glmer(update(base.formula,  ~ . + only_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0547341
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "only_battleTRUE")
## Variable: only_battleTRUE
## DF diff: 1    New AIC: 28682.6        Chisq: 0.5  P(>chisq) = 0.4817 )
## beta: -0.0131 (+/- 0.019)     stat: -0.71 => P(>|z|) = 0.4807
## rate ratio: -1.3% (-3.1% to 0.6%)

Journey

projects.trunc.jor = projects.trunc[projects.trunc$journey_metaphor > 0, ]

N = 693

p1 = projects.trunc.jor %>%
  ggplot(aes(scale(journey_salience), mean_donation)) + theme_minimal() +
  geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")

p2 = projects.trunc.jor %>%
  ggplot(aes(scale(journey_prod), mean_donation)) + theme_minimal() +
  geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")

p3 = projects.trunc.jor %>%
  ggplot(aes(scale(first_instantiation), mean_donation)) + theme_minimal() +
  geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")

grid.arrange(p1, p2, p3, nrow=1)

Perform model selection to prevent overfitting:

mean.mod.base = glmer(base.formula, data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00287654
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula,  ~ . - text_length_words_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - updates_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - cancer_type)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - photos_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - month)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.jor, family = Gamma(link = "log"))
# summary(mean.mod)

Results

mod = glmer(update(f,  ~ . + scale(journey_salience)), data = projects.trunc.jor, family = Gamma(link = "log"))
compare(mean.mod, mod, "scale(journey_salience)", 0.6)
## Variable: scale(journey_salience)
## DF diff: 1    New AIC: 6954.3         Chisq: 2.3  P(>chisq) = 0.1319 )
## beta: 0.0223 (+/- 0.015)  stat: 1.49 => P(>|z|) = 0.1349
## rate ratio: 1.3% (0.4% to 2.3%)
mod = glmer(update(f,  ~ . + scale(journey_prod)), data = projects.trunc.jor, family = Gamma(link = "log"))
compare(mean.mod, mod, "scale(journey_prod)")
## Variable: scale(journey_prod)
## DF diff: 1    New AIC: 6948.5         Chisq: 8.1  P(>chisq) = 0.0045 (significant))
## beta: 0.0422 (+/- 0.015)  stat: 2.79 => P(>|z|) = 0.0052
## rate ratio: 4.3% (2.7% to 5.9%)
mod = glmer(update(f,  ~ . + scale(first_instantiation)), data = projects.trunc.jor, family = Gamma(link = "log"))
compare(mean.mod, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1    New AIC: 6955.5         Chisq: 1    P(>chisq) = 0.3109 )
## beta: -0.0154 (+/- 0.015)     stat: -1.01 => P(>|z|) = 0.3103
## rate ratio: -1.5% (-3% to 0%)
mod = glmer(update(base.formula,  ~ . + scale(journey_salience)), data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00422534
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(journey_salience)", 0.6)
## Variable: scale(journey_salience)
## DF diff: 1    New AIC: 6987       Chisq: 3.5  P(>chisq) = 0.0625 )
## beta: 0.0325 (+/- 0.018)  stat: 1.84 => P(>|z|) = 0.0651
## rate ratio: 2% (0.9% to 3%)
mod = glmer(update(base.formula,  ~ . + scale(journey_prod)), data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00582552
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(journey_prod)")
## Variable: scale(journey_prod)
## DF diff: 1    New AIC: 6983.3         Chisq: 7.1  P(>chisq) = 0.0076 (significant))
## beta: 0.0434 (+/- 0.016)  stat: 2.64 => P(>|z|) = 0.0082
## rate ratio: 4.4% (2.7% to 6.2%)
mod = glmer(update(base.formula,  ~ . + scale(first_instantiation)), data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00550599
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1    New AIC: 6990       Chisq: 0.5  P(>chisq) = 0.4774 )
## beta: -0.0109 (+/- 0.015)     stat: -0.71 => P(>|z|) = 0.4762
## rate ratio: -1.1% (-2.6% to 0.4%)

Battle

projects.trunc.bat = projects.trunc[projects.trunc$battle_metaphor > 0, ]

N = 2585

p1 = projects.trunc.bat %>%
  ggplot(aes(scale(battle_salience), mean_donation)) + theme_minimal() +
  geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")

p2 = projects.trunc.bat %>%
  ggplot(aes(scale(battle_prod), mean_donation)) + theme_minimal() +
  geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")

p3 = projects.trunc.bat %>%
  ggplot(aes(scale(first_instantiation), mean_donation)) + theme_minimal() +
  geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")

grid.arrange(p1, p2, p3, nrow=1)

Perform model selection to prevent overfitting:

mean.mod.base = glmer(base.formula, data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0698876
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula,  ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.bat, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.bat, family = Gamma(link = "log")), test="Chisq")
f = update(f,  ~ . - text_length_words_sc)
# drop1(glmer(f, data = projects.trunc.bat, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0505245
## (tol = 0.001, component 1)
# summary(mean.mod)

Results

mod = glmer(update(f,  ~ . + scale(battle_salience)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0517634
## (tol = 0.001, component 1)
compare(mean.mod, mod, "scale(battle_salience)", 0.16)
## Variable: scale(battle_salience)
## DF diff: 1    New AIC: 26050.9        Chisq: 4.7  P(>chisq) = 0.0306 (significant))
## beta: -0.018 (+/- 0.008)  stat: -2.17 => P(>|z|) = 0.03
## rate ratio: -0.3% (-0.4% to -0.2%)
mod = glmer(update(f,  ~ . + scale(battle_prod)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0825706
## (tol = 0.001, component 1)
compare(mean.mod, mod, "scale(battle_prod)")
## Variable: scale(battle_prod)
## DF diff: 1    New AIC: 26055.1        Chisq: 0.5  P(>chisq) = 0.4942 )
## beta: 0.0056 (+/- 0.008)  stat: 0.68 => P(>|z|) = 0.4955
## rate ratio: 0.6% (-0.3% to 1.4%)
mod = glmer(update(f,  ~ . + scale(first_instantiation)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.036784
## (tol = 0.001, component 1)
compare(mean.mod, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1    New AIC: 26055.5        Chisq: 0.1  P(>chisq) = 0.721 )
## beta: 0.0028 (+/- 0.008)  stat: 0.34 => P(>|z|) = 0.735
## rate ratio: 0.3% (-0.5% to 1.1%)
mod = glmer(update(base.formula,  ~ . + scale(battle_salience)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0506704
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(battle_salience)", 0.16)
## Variable: scale(battle_salience)
## DF diff: 1    New AIC: 26059.9        Chisq: 2.9  P(>chisq) = 0.0875 )
## beta: -0.0154 (+/- 0.009)     stat: -1.7 => P(>|z|) = 0.0897
## rate ratio: -0.2% (-0.4% to -0.1%)
mod = glmer(update(base.formula,  ~ . + scale(battle_prod)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0475429
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(battle_prod)")
## Variable: scale(battle_prod)
## DF diff: 1    New AIC: 26062.7        Chisq: 0.1  P(>chisq) = 0.7882 )
## beta: 0.0023 (+/- 0.008)  stat: 0.27 => P(>|z|) = 0.7845
## rate ratio: 0.2% (-0.6% to 1.1%)
mod = glmer(update(base.formula,  ~ . + scale(first_instantiation)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0438204
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1    New AIC: 26062.5        Chisq: 0.3  P(>chisq) = 0.5764 )
## beta: 0.0046 (+/- 0.008)  stat: 0.56 => P(>|z|) = 0.5749
## rate ratio: 0.5% (-0.4% to 1.3%)